**Generate country-level aggregates from micro file
use "HSSC replication data\Gallup data\cleaned_worldpoll_microdata.dta", clear

*Naming conventions
merge m:1 WP5 using  "HSSC replication data\Public data\country_naming_conventions.dta"
drop if _merge==2
drop _merge
**Un regions
merge m:1 sub_regionname using "HSSC replication data\Public data\un regions matched to subregions.dta", update replace
drop if _merge==2
drop _merge

collapse (first) name3 region incomegroup regioncode regionname sub_regionname country countrycode ///
(count) Nharm=harm_index N_lost_inc=lost_income  (mean) employed  living_worse life_eval out_labor unemployed lab_force  lost_income affected_alot lost_job lost_hours temp_layoff  harm_index ///
Yworry low_life not_enough_food  [aw=WGT], by(WP5)
save "HSSC replication data\Generated data\cleaned_worldpoll_macrodata.dta", replace

*use country level aggregates
use "HSSC replication data\Generated data\cleaned_worldpoll_macrodata.dta", replace

*IHME country-level through March 2021
merge 1:1 countrycode using "HSSC replication data\Public data\IHME_excess_deaths_through_March_2021.dta", update replace
drop if _merge==2
drop _merge

*Oxford country-level through March 2021
merge 1:1 countrycode  using "HSSC replication data\Public data\Oxford_Policy_Data_2020_to_March2021_Cumulative.dta", update replace
drop if _merge==2
drop _merge

*Google country-level through March 2021
merge 1:1 country using "HSSC replication data\Public data\Google_Mobility_Data_March31_2021.dta", update replace
drop if _merge==2
drop _merge

*World Bank gdp per capita
merge 1:1 countrycode using "HSSC replication data\Public data\world_bank_gdp_per_capitaPPP_may2023.dta"
drop if _merge==2
drop _merge

drop if harm==.
set obs 118
replace sub_regionname = "World" in 118

foreach x in Cstringencyindex Ceconomicsupportindex retail_7day IHME_reported_deaths IHME_est_deaths   ///
Yworry not_enough_food living_worse  low_life harm_index temp_layoff lost_job lost_hours lost_income affected_alot {
sum `x' [aw=IHMEtotal_pop]
replace `x'=r(mean) in 118
}

**retail_7day Cstringencyindex Ceconomicsupportindex through March 31 2021

replace IHMEtotal_pop=1 in 118

foreach x in Cstringencyindex Ceconomicsupportindex retail_7day IHME_reported_deaths IHME_est_deaths  ///
Yworry not_enough_food living_worse  low_life harm_index temp_layoff lost_job lost_hours lost_income affected_alot {
egen Z`x'=std(`x')
}

pwcorr gdp_pc_ppp2019 harm lost_job lost_income  Cstringencyindex Ceconomicsupportindex retail_7day IHME_reported_deaths IHME_est_deaths 
pwcorr Cstringencyindex gdp_pc_ppp2019 harm g_retail g_grocery g_park g_transit g_work g_home
pwcorr lost_job harm g_retail g_grocery g_park g_transit g_work g_home


***********
*Supplementary Fig 1--what correlates with stringency--validation
************
**Facebook data on masks and contacts
gen iso_code=countrycode
merge 1:1 iso_code using "HSSC replication data\Public data\global_contact_usage.dta"
drop if _merge==2
drop _merge
replace percent_dc=.46 if iso_code=="USA" /*Gallup Panel data*/

**Flu Data by country
merge 1:1 countrycode using "HSSC replication data\Public data\Flu_2020_change_from_peak_week.dta"
drop if _merge==2
drop _merge

ren ZCstringencyindex SDstringencyindex 
gen logdeaths=log(IHME_reported_deaths) 
label var logdeaths "Log of COVID deaths per 100k"
label var g_retail "Index of visits to retail and restaurants"
label var percent_dc "Percent with non-household contact over 24 hours"
label var CH_AVG_flu "Post-COVID flu cases in peak season relative to previous years"

corr SDstringencyindex  logdeaths
local corr : di %05.2g r(rho) 
twoway scatter (SDstringencyindex  logdeaths) || lfit (SDstringencyindex  logdeaths), ///
ytitle("Stringency index") plotregion(color(white)) graphregion(color(white))  ///
saving(String1) ///
note(r =`corr') title("COVID deaths") legend(order(1 "Observed" 2 "Linear fit")  region(style(none))  rows(1) span) 

corr SDstringencyindex  g_retail
local corr : di %05.2g r(rho) 
twoway scatter (SDstringencyindex  g_retail) || lfit (SDstringencyindex  g_retail), ///
ytitle("Stringency index") plotregion(color(white)) graphregion(color(white))  ///
saving(String2) ///
note(r =`corr') title("Social contact measured by mobility") legend(off) 

corr SDstringencyindex  percent_dc
local corr : di %05.2g r(rho) 
twoway scatter (SDstringencyindex  percent_dc) || lfit (SDstringencyindex  percent_dc), ///
ytitle("Stringency index") plotregion(color(white)) graphregion(color(white))  ///
saving(String3) ///
note(r =`corr') title("Self-reported social contact") legend(off) 

corr SDstringencyindex  CH_AVG_flu if SDstringencyindex!=. & (MISSflu_pos_testCOV<.1 &  MISSflu_pos_testPRE<.1)
local corr : di %05.2g r(rho) 
twoway scatter SDstringencyindex  CH_AVG_flu if SDstringencyindex!=. & (MISSflu_pos_testCOV<.1 &  MISSflu_pos_testPRE<.1) ///
|| lfit SDstringencyindex  CH_AVG_flu if SDstringencyindex!=. & (MISSflu_pos_testCOV<.1 & MISSflu_pos_testPRE<.1), ///
ytitle("Stringency index") plotregion(color(white)) graphregion(color(white))  ///
saving(String4) ///
note(r =`corr') title("Percentage drop in confirmed influenza cases") legend(off) 

grc1leg String1.gph String2.gph String3.gph String4.gph   ///
, imargin(2 2 2 2) row(3) col(2) legendfrom(String1.gph) iscale(*.75) plotregion(color(white)) graphregion(color(white))  
graph export "HSSC replication data\Findings\Stringency_Validation_plot.png", replace


***************
**COLLAPSE by SUB-region****
***********
collapse (count) N=harm (first) regionname ///
(mean) Cstringencyindex Ceconomicsupportindex retail_7day IHME_reported_deaths IHME_est_deaths ///
Yworry not_enough_food living_worse  low_life harm_index temp_layoff lost_job lost_hours lost_income affected_alot  ///
SDstringencyindex ZCeconomicsupportindex Zretail_7day ZIHME_reported_deaths ZIHME_est_deaths ZYworry Znot_enough_food Zliving_worse ///
Zlow_life Zharm_index Ztemp_layoff Zlost_job Zlost_hours Zlost_income Zaffected_alot ///
[aw=IHMEtotal_pop], by(sub_regionname)

order regionname sub_regionname
gsort regionname -N
export delimited regionname sub_regionname ///
Cstringencyindex Ceconomicsupportindex retail_7day IHME_reported_deaths IHME_est_deaths ///
Yworry not_enough_food living_worse  low_life harm_index temp_layoff lost_job lost_hours lost_income affected_alot ///
using "HSSC replication data\Findings\cumulative_summary_data_by_subregion.csv", replace


